[HVM] Control qemu's state-save via xenstore, instead of SIGUSR1
authorTim Deegan <Tim.Deegan@xensource.com>
Tue, 24 Jul 2007 13:52:16 +0000 (14:52 +0100)
committerTim Deegan <Tim.Deegan@xensource.com>
Tue, 24 Jul 2007 13:52:16 +0000 (14:52 +0100)
This lets us verify that qemu has indeed stopped processing before
we start saving guest memory.  Also allow qemu to continue processing
after the save has happened, instead of exiting immediately.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
tools/ioemu/hw/cirrus_vga.c
tools/ioemu/target-i386-dm/helper2.c
tools/ioemu/vl.c
tools/ioemu/vl.h
tools/ioemu/xenstore.c
tools/python/xen/xend/XendCheckpoint.py
tools/python/xen/xend/XendDomainInfo.py
tools/python/xen/xend/image.py

index 96f7cbb007c24c2bbe84c8021d1ca21f5908905c..fb2f3ae556819bf8200ca663473e4d04c256fe2f 100644 (file)
@@ -3096,8 +3096,6 @@ static void cirrus_vga_save(QEMUFile *f, void *opaque)
     qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_addr);
     qemu_put_be64s(f, (uint64_t*)&s->cirrus_lfb_end);
     qemu_put_buffer(f, s->vram_ptr, VGA_RAM_SIZE); 
-    if (vga_acc)
-        cirrus_stop_acc(s);
 }
 
 static int cirrus_vga_load(QEMUFile *f, void *opaque, int version_id)
index 971ff257dbfdfc93340f7f32bc5f0e12a2314b79..7b4dcca6870d8e1a80bde98e68d191db84ae460a 100644 (file)
@@ -618,6 +618,7 @@ int main_loop(void)
     CPUState *env = cpu_single_env;
     int evtchn_fd = xc_evtchn_fd(xce_handle);
     char qemu_file[PATH_MAX];
+    fd_set fds;
 
     buffered_io_timer = qemu_new_timer(rt_clock, handle_buffered_io,
                                       cpu_single_env);
@@ -625,19 +626,34 @@ int main_loop(void)
 
     qemu_set_fd_handler(evtchn_fd, cpu_handle_ioreq, NULL, env);
 
-    while (!(vm_running && suspend_requested))
-        /* Wait up to 10 msec. */
-        main_loop_wait(10);
-
-    fprintf(logfile, "device model received suspend signal!\n");
-
-    /* Pull all outstanding ioreqs through the system */
-    handle_buffered_io(env);
-    main_loop_wait(1); /* For the select() on events */
+    xenstore_record_dm_state("running");
+    while (1) {
+        while (!(vm_running && suspend_requested))
+            /* Wait up to 10 msec. */
+            main_loop_wait(10);
+
+        xenstore_record_dm_state("paused");
+        fprintf(logfile, "device model saving state\n");
+
+        /* Pull all outstanding ioreqs through the system */
+        handle_buffered_io(env);
+        main_loop_wait(1); /* For the select() on events */
+
+        /* Save the device state */
+        snprintf(qemu_file, sizeof(qemu_file), 
+                 "/var/lib/xen/qemu-save.%d", domid);
+        do_savevm(qemu_file);
+
+        /* Wait to be allowed to continue */
+        while (suspend_requested) {
+            FD_ZERO(&fds);
+            FD_SET(xenstore_fd(), &fds);
+            if (select(xenstore_fd() + 1, &fds, NULL, NULL, NULL) > 0)
+                xenstore_process_event(NULL);
+        }
 
-    /* Save the device state */
-    snprintf(qemu_file, sizeof(qemu_file), "/var/lib/xen/qemu-save.%d", domid);
-    do_savevm(qemu_file);
+        xenstore_record_dm_state("running");
+    }
 
     return 0;
 }
index 41035ddec6b3929ea1898b85d53a2f910d56a6e1..b1c3cca009029a03a41e1d254db4985a24d50fba 100644 (file)
@@ -6856,15 +6856,6 @@ int set_mm_mapping(int xc_handle, uint32_t domid,
     return 0;
 }
 
-void suspend(int sig)
-{
-    fprintf(logfile, "suspend sig handler called with requested=%d!\n",
-            suspend_requested);
-    if (sig != SIGUSR1)
-        fprintf(logfile, "suspend signal dismatch, get sig=%d!\n", sig);
-    suspend_requested = 1;
-}
-
 #if defined(MAPCACHE)
 
 #if defined(__i386__) 
@@ -7057,6 +7048,7 @@ int main(int argc, char **argv)
     xen_pfn_t *page_array;
     extern void *buffered_pio_page;
 #endif
+    sigset_t set;
 
     char qemu_dm_logfilename[128];
     
@@ -7982,24 +7974,11 @@ int main(int argc, char **argv)
        close(fd);
     }
 
-    /* register signal for the suspend request when save */
-    {
-        struct sigaction act;
-        sigset_t set;
-        act.sa_handler = suspend;
-        act.sa_flags = SA_RESTART;
-        sigemptyset(&act.sa_mask);
-
-        sigaction(SIGUSR1, &act, NULL);
-
-        /* control panel mask some signals when spawn qemu, need unmask here*/
-        sigemptyset(&set);
-        sigaddset(&set, SIGUSR1);
-        sigaddset(&set, SIGTERM);
-        if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
-            fprintf(stderr, "unblock signal fail, possible issue for HVM save!\n");
-
-    }
+    /* Unblock SIGTERM, which may have been blocked by the caller */
+    sigemptyset(&set);
+    sigaddset(&set, SIGTERM);
+    if (sigprocmask(SIG_UNBLOCK, &set, NULL) == -1)
+        fprintf(stderr, "Failed to unblock SIGTERM\n");
 
     main_loop();
     quit_timers();
index 55ee261dc946d907e3adc6a964a0d1e57fbca6f0..80b88a50a411096465f08817b3994fcffb0cd273 100644 (file)
@@ -1456,6 +1456,7 @@ void readline_start(const char *prompt, int is_password,
 void xenstore_parse_domain_config(int domid);
 int xenstore_fd(void);
 void xenstore_process_event(void *opaque);
+void xenstore_record_dm_state(char *state);
 void xenstore_check_new_media_present(int timeout);
 void xenstore_write_vncport(int vnc_display);
 int xenstore_read_vncpasswd(int domid);
index ddadcb6a0fba32a5a75237c894c8247120f05cbd..37c535e7b5d93003fb191868a035b4b2514ae931 100644 (file)
@@ -186,6 +186,12 @@ void xenstore_parse_domain_config(int domid)
         fprintf(logfile, "Watching %s\n", buf);
     }
 
+    /* Set a watch for suspend requests from the migration tools */
+    if (pasprintf(&buf, 
+                  "/local/domain/0/device-model/%u/command", domid) != -1) {
+        xs_watch(xsh, buf, "dm-command");
+        fprintf(logfile, "Watching %s\n", buf);
+    }
 
  out:
     free(type);
@@ -310,6 +316,52 @@ void xenstore_process_logdirty_event(void)
 }
 
 
+/* Accept state change commands from the control tools */
+static void xenstore_process_dm_command_event(void)
+{
+    char *path = NULL, *command = NULL;
+    unsigned int len;
+    extern int suspend_requested;
+
+    if (pasprintf(&path, 
+                  "/local/domain/0/device-model/%u/command", domid) == -1) {
+        fprintf(logfile, "out of memory reading dm command\n");
+        goto out;
+    }
+    command = xs_read(xsh, XBT_NULL, path, &len);
+    if (!command)
+        goto out;
+    
+    if (!strncmp(command, "save", len)) {
+        fprintf(logfile, "dm-command: pause and save state\n");
+        suspend_requested = 1;
+    } else if (!strncmp(command, "continue", len)) {
+        fprintf(logfile, "dm-command: continue after state save\n");
+        suspend_requested = 0;
+    } else {
+        fprintf(logfile, "dm-command: unknown command\"%*s\"\n", len, command);
+    }
+
+ out:
+    free(path);
+    free(command);
+}
+
+void xenstore_record_dm_state(char *state)
+{
+    char *path = NULL;
+
+    if (pasprintf(&path, 
+                  "/local/domain/0/device-model/%u/state", domid) == -1) {
+        fprintf(logfile, "out of memory recording dm state\n");
+        goto out;
+    }
+    if (!xs_write(xsh, XBT_NULL, path, state, strlen(state)))
+        fprintf(logfile, "error recording dm state\n");
+
+ out:
+    free(path);
+}
 
 void xenstore_process_event(void *opaque)
 {
@@ -325,6 +377,11 @@ void xenstore_process_event(void *opaque)
         goto out;
     }
 
+    if (!strcmp(vec[XS_WATCH_TOKEN], "dm-command")) {
+        xenstore_process_dm_command_event();
+        goto out;
+    }
+
     if (strncmp(vec[XS_WATCH_TOKEN], "hd", 2) ||
         strlen(vec[XS_WATCH_TOKEN]) != 3)
         goto out;
index 1aa7d7f69de69fc614b6edbcc173721f7a3e1cae..47b4cfba13e5f62dcea6c652461a46dc9437089b 100644 (file)
@@ -98,10 +98,6 @@ def save(fd, dominfo, network, live, dst, checkpoint=False):
                 log.info("Domain %d suspended.", dominfo.getDomid())
                 dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP3,
                                        domain_name)
-                #send signal to device model for save
-                if hvm:
-                    log.info("release_devices for hvm domain")
-                    dominfo._releaseDevices(True)
                 tochild.write("done\n")
                 tochild.flush()
                 log.debug('Written done')
@@ -139,7 +135,7 @@ def save(fd, dominfo, network, live, dst, checkpoint=False):
     except Exception, exn:
         log.exception("Save failed on domain %s (%s).", domain_name,
                       dominfo.getDomid())
-
+        
         dominfo.resumeDomain()
         log.debug("XendCheckpoint.save: resumeDomain")
 
index eb5c57556cff6c11b0bdc254c2d8de562f7b7da5..3ce18bc96fa65ef5af41e30cf89c7476d64853a0 100644 (file)
@@ -1122,6 +1122,8 @@ class XendDomainInfo:
                     self._clearRestart()
 
                     if reason == 'suspend':
+                        if self._stateGet() != DOM_STATE_SUSPENDED:
+                            self.image.saveDeviceModel()
                         self._stateSet(DOM_STATE_SUSPENDED)
                         # Don't destroy the domain.  XendCheckpoint will do
                         # this once it has finished.  However, stop watching
@@ -1764,6 +1766,9 @@ class XendDomainInfo:
             ResumeDomain(self.domid)
         except:
             log.exception("XendDomainInfo.resume: xc.domain_resume failed on domain %s." % (str(self.domid)))
+        if self.is_hvm():
+            self.image.resumeDeviceModel()
+
 
     #
     # Channels for xenstore and console
index 28f100bedc55c4488239f2706d6e20c732088faf..bd5e6b2cf4daf37cd628b00560a268ab1ca2a76c 100644 (file)
@@ -20,6 +20,7 @@
 import os, string
 import re
 import math
+import time
 import signal
 
 import xen.lowlevel.xc
@@ -27,6 +28,7 @@ from xen.xend.XendConstants import REVERSE_DOMAIN_SHUTDOWN_REASONS
 from xen.xend.XendError import VmError, XendError, HVMRequired
 from xen.xend.XendLogging import log
 from xen.xend.XendOptions import instance as xenopts
+from xen.xend.xenstore.xstransact import xstransact
 from xen.xend.xenstore.xswatch import xswatch
 from xen.xend import arch
 
@@ -175,6 +177,14 @@ class ImageHandler:
         """Create device model for the domain (define in subclass if needed)."""
         pass
     
+    def saveDeviceModel(self):
+        """Save device model for the domain (define in subclass if needed)."""
+        pass
+
+    def resumeDeviceModel(self):
+        """Unpause device model for the domain (define in subclass if needed)."""
+        pass
+
     def destroy(self):
         """Extra cleanup on domain destroy (define in subclass if needed)."""
         pass
@@ -443,17 +453,34 @@ class HVMImageHandler(ImageHandler):
         self.vm.storeDom("image/device-model-pid", self.pid)
         log.info("device model pid: %d", self.pid)
 
+    def saveDeviceModel(self):
+        # Signal the device model to pause itself and save its state
+        xstransact.Store("/local/domain/0/device-model/%i"
+                         % self.vm.getDomid(), ('command', 'save'))
+        # Wait for confirmation.  Could do this with a watch but we'd
+        # still end up spinning here waiting for the watch to fire. 
+        state = ''
+        count = 0
+        while state != 'paused':
+            state = xstransact.Read("/local/domain/0/device-model/%i/state"
+                                    % self.vm.getDomid())
+            time.sleep(0.1)
+            count += 1
+            if count > 100:
+                raise VmError('Timed out waiting for device model to save')
+
+    def resumeDeviceModel(self):
+        # Signal the device model to resume activity after pausing to save.
+        xstransact.Store("/local/domain/0/device-model/%i"
+                         % self.vm.getDomid(), ('command', 'continue'))
+
     def recreate(self):
         self.pid = self.vm.gatherDom(('image/device-model-pid', int))
 
     def destroy(self, suspend = False):
-        if self.pid:
+        if self.pid and not suspend:
             try:
-                sig = signal.SIGKILL
-                if suspend:
-                    log.info("use sigusr1 to signal qemu %d", self.pid)
-                    sig = signal.SIGUSR1
-                os.kill(self.pid, sig)
+                os.kill(self.pid, signal.SIGKILL)
             except OSError, exn:
                 log.exception(exn)
             try:
@@ -464,6 +491,8 @@ class HVMImageHandler(ImageHandler):
                 # but we can't wait for it because it's not our child.
                 pass
             self.pid = None
+            state = xstransact.Remove("/local/domain/0/device-model/%i"
+                                      % self.vm.getDomid())
 
 
 class IA64_HVM_ImageHandler(HVMImageHandler):
@@ -507,6 +536,7 @@ class X86_HVM_ImageHandler(HVMImageHandler):
         return max(4 * (256 * self.vm.getVCpuCount() + 2 * (maxmem_kb / 1024)),
                    shadow_mem_kb)
 
+
 class X86_Linux_ImageHandler(LinuxImageHandler):
 
     def buildDomain(self):